Israel Palestine Conflict

by Partha Lalit and Sumaiya Khan

knitr::opts_chunk$set(echo = T)

Import Libraries and Data Set

library(DT)
library(highcharter)
library(dplyr)
library(car)
library(visdat)
library(lubridate)
library(gridExtra)

# timeseries
library(forecast)
library(tseries)
library(fpp2)
library(lmtest)
library(tidyr)
library(padr)
library(imputeTS)
# heatmap
library(ggplot2)
library(sf)
library(rvest)
library(plotly)
library(viridis)
library(ggrepel)
library(ggthemes)

#cowplot
library(cowplot)
#WordCloud
library(ggwordcloud)

conflict= read.csv('C:/Users/12par/OneDrive/Desktop/isr_pse_conflict_20_copy.csv')

Data Cleaning and Manipulation

#Converting blank values to NA values

conflict[conflict == ''] <- NA
  #View(conflict)
vis_miss(conflict)

#na_count <- sum(is.na(conflict))
#print(na_count)
#colSums(is.na(conflict))
#rowSums(is.na(conflict))

conflict <- conflict %>%
  select(-ammunition, -took_part_in_the_hostilities)

#colSums(is.na(data1))

conflict <- conflict[complete.cases(conflict), ]
#View(data_clean)
#na_count <- sum(is.na(data_clean))
#print(na_count)

vis_miss(conflict)

#Total values omitted  457

#Convert date format from dmy to Y
df <- conflict %>% 
  mutate(NEW_FORMAT = parse_date_time(date_of_event, 
                                      orders = c("mdy", "dmy", "ymd"))) %>% 
  mutate(DESIRED_FORMAT = format.Date(NEW_FORMAT,  "%Y"))

  #View(df)


# Month Year format
df2 <- df %>% 
  mutate(NEW_FORMAT = parse_date_time(date_of_event, 
                                      orders = c("mdy", "dmy", "ymd"))) %>% 
  mutate(DESIRED_FORMAT2 = format.Date(NEW_FORMAT,  "%b %Y"))

  #View(df2)

# Spliting age into age groups
df3= df2 %>% 
  mutate(
    # Create categories
    age_group = dplyr::case_when(
      age <= 18            ~ "0-18",
      age > 18 & age <= 36 ~ "19-36",
      age > 36 & age <= 54 ~ "37-54",
      age > 54             ~ "> 54" 
      ) ,
    # Convert to factor
    age_group = factor(
      age_group,
      level = c("0-18", "19-36","37-54", "> 54")
    )
  )
  #View(df3)

# Omitting the NA valued data rows
df_clean= select(df3,-c(NEW_FORMAT))
  #View(df_clean)   
  #vis_miss(df_clean)

# Exporting a clean excel file for future reference

 #library(writexl)
  #write_xlsx( df_clean,"C:/Users/12par/Downloads/maindf.xlsx")

Graphical Insights

custom_colors <- c("#454568", "#785956")

df_clean%>%
  group_by(gender) %>%
  summarise(count=n()) %>%
  # arrange(desc(Rating)) %>%  # Sort by count in descending order
  # head(5) %>%  
  hchart('pie', hcaes(x = gender, y = count, color = custom_colors)) %>%
  hc_add_theme(hc_theme_google()) %>%
  hc_tooltip(pointFormat = '<b> Count </b>: {point.y} <br>') %>%  
  hc_title(text = "Genderwise Count")
df_clean%>%
  group_by(age_group) %>%
  summarise(count=n()) %>%
  # arrange(desc(Rating)) %>%  # Sort by count in descending order
  # head(5) %>%  
  hchart('column', hcaes(x = age_group, y = count, color = count)) %>%
  hc_add_theme(hc_theme_google()) %>%
  hc_tooltip(pointFormat = '<b> Count </b>: {point.y} <br>') %>%  
  hc_title(text = "Age Groups")
custom_colors <- c("#005EB8", "#000000")

df_clean%>%
  group_by(citizenship) %>%
  summarise(count=n()) %>%
  # arrange(desc(Rating)) %>%  # Sort by count in descending order
  # head(5) %>%  
  hchart('column', hcaes(x = citizenship, y = count, color = custom_colors)) %>%
  hc_add_theme(hc_theme_google()) %>%
  hc_tooltip(pointFormat = '<b> Count </b>: {point.y} <br>') %>%  
  hc_title(text = "Countrywise Citizenship Count")
custom_colors <- c("#EE2A35","#005EB8", "#000000")

df_clean%>%
  group_by(killed_by) %>%
  summarise(count=n()) %>%
  # arrange(desc(Rating)) %>%  # Sort by count in descending order
  # head(5) %>%  
  hchart('column', hcaes(x = killed_by, y = count, color = custom_colors)) %>%
  hc_add_theme(hc_theme_google()) %>%
  hc_tooltip(pointFormat = '<b> Count </b>: {point.y} <br>') %>%  
  hc_title(text = "Killed By Counts")
df_clean%>%
  group_by(DESIRED_FORMAT) %>%
  summarise(count=n()) %>%
  # arrange(desc(Rating)) %>%  # Sort by count in descending order
  # head(5) %>%  
  hchart('column', hcaes(x = DESIRED_FORMAT, y = count, color = count)) %>%
  hc_add_theme(hc_theme_google()) %>%
  hc_tooltip(pointFormat = '<b> Count </b>: {point.y} <br>') %>%  
  hc_title(text = "Year Wise Count of Deaths")
#h1= df_clean%>%
 # group_by(type_of_injury) %>%
  #summarise(count = n()) %>%
  #arrange(desc(count)) %>%  
  #head(5) %>%  
  #hchart('pie', hcaes(x =type_of_injury, y = count, color = count)) %>%
  #hc_add_theme(hc_theme_google()) %>%
  #hc_tooltip(pointFormat = '<b> Count </b>: {point.y} <br>') %>%  
  #hc_title(text = "Type of Injuries")

# h1
set.seed(1000)
w = df_clean %>% group_by(type_of_injury) %>%
  summarize(count = n())
#View(w)
ggplot(w, aes(label = w$type_of_injury, size = w$count, color= w$count)) +
  geom_text_wordcloud() +
  scale_size_area(max_size = 50) +
  theme_minimal() +
  scale_color_gradient(low = "darkred", high = "maroon")

Heat Maps

#Importing Shape Files
is.sh1= read_sf('C:\\Users\\12par\\Downloads\\ISR_adm\\ISR_adm1.shp')
pa.sh1= read_sf('C:\\Users\\12par\\Downloads\\PSE_adm\\PSE_adm2.shp')
#View(is.sh1)
#View(pa.sh1)
# Changing Pivot table column names to shape file column names


###PALESTINE DEATHS

#                 Total Palestine Deaths
prw=  df_clean %>% group_by(event_location_region, event_location_district) %>%
  summarize(count = n())

#View(prw)

#colnames(prw)[1] <- "NAME_1"
colnames(prw)[2] <- "NAME_2"
#View(prw)

#Merging Palestine shape file with Pivot table
map.prw= merge(pa.sh1, prw, by= "NAME_2" )
#View(map.prw)

points.prw= cbind(map.prw, st_coordinates(st_centroid(map.prw$geometry)))



#               Israeli Citizens Dead in Palestine
pa.icp = df_clean %>% group_by(df_clean$citizenship,
                               df_clean$event_location_district) %>%
  filter(citizenship =="Israeli") %>%  summarize(count = n())
#View(pa.icp)

#colnames(pa.deaths)[1] <- "NAME_0"
colnames(pa.icp)[2] <- "NAME_2"
#View(pa.icp)

map.icp = merge(pa.sh1, pa.icp, by= "NAME_2", )

#                 Palestinian Citizens Dead in Palestine
pa.pcp = df_clean %>% group_by(df_clean$citizenship,
                               df_clean$event_location_district) %>%
  filter(citizenship =="Palestina") %>%  summarize(count = n())
#View(pa.pcp)

#colnames(pa.deaths)[1] <- "NAME_0"
colnames(pa.pcp)[2] <- "NAME_2"
#View(pa.pcp)

map.pcp= merge(pa.sh1, pa.pcp, by= "NAME_2")


info= paste0(
  '\nName:', map.prw$NAME_2,
  '\nTotal Death Count: ', map.prw$count,
  '\nPalestinians dead in Palestine : ', map.pcp$count,
  '\nIsraelis dead in Palestine : ', map.icp$count
)
map.prw$info = info

pa.pd= ggplot(map.prw)+ geom_sf(aes(fill= map.prw$count, label= info),
                                color= 'white', size= 1)+
  ggtitle("Deaths in Palestine")+
  geom_text(data= points.prw, aes(x= points.prw$X, y= points.prw$Y, 
                                  label= paste(points.prw$NAME_2)),
            color= 'Red', vjust= -2)+
  scale_fill_viridis_c(option = 'A', trans= 'sqrt')+
  labs(x = "Longitude", y = "Latitude")+ theme(
  plot.margin = margin(1, 1, 1, 1, "cm"))

fig1= ggplotly(pa.pd) %>%
  layout(autosize = F, width = 750, height = 750)
fig1
# ISRAEL DEATHS
#                  Total Israel Deaths
irw =  df_clean %>% group_by(event_location,
                             event_location_region) %>% 
  filter(event_location_region == "Israel") %>%
  summarize(count = n())
#View(irw)

colnames(irw)[1] <- "NAME_1"
#View(irw)

map.irw= merge(is.sh1, irw, by= "NAME_1")
#View(map.irw)

points.irw= cbind(map.irw, st_coordinates(st_centroid(map.irw$geometry)))
#View(points.irw)

#                         Israeli citizens dead in Israel
is.ici = df_clean %>% group_by(
  df_clean$event_location,
  df_clean$event_location_region,
  df_clean$citizenship) %>%
  filter( event_location_region =='Israel', citizenship== 'Israeli') %>%  
  summarize(count = n())
#View(is.ici)

colnames(is.ici)[1] <- "NAME_1"
#View(is.ici)

map.ici= merge(is.sh1, is.ici, by= "NAME_1")
#View(map.ici)

#                  Palestine Citizens Dead in Israel

is.pci = df_clean %>% group_by(
  df_clean$event_location,
  df_clean$event_location_region,
  df_clean$citizenship) %>%
  filter( event_location_region =='Israel', citizenship== 'Palestina') %>%  
  summarize(count = n())
#View(is.pci)

colnames(is.pci)[1] <- "NAME_1"
#View(is.pci)

map.pci= merge(is.sh1, is.pci, by= "NAME_1")
#View(map.pci)

info1= paste0(
  '\nName: ', map.irw$NAME_1,
  '\nTotal Death Count: ', map.irw$count,
  '\nIsraeli dead in Israel: ', map.ici$count,
  '\nPalestinians dead in Israel: ', map.pci$count
)
map.irw$info1 = info1


itd = ggplot(map.irw)+ geom_sf(aes(fill= map.irw$count, label= info1), 
                               color= 'white', size= 5)+
  scale_fill_viridis(option = 'B', trans= 'sqrt')+
  ggtitle("Israeli Deaths")+
  geom_text(data= points.irw, aes(x= points.irw$X, y= points.irw$Y, 
                                  label= paste(points.irw$NAME_1)),
            nudge_x = c(0, -0.4, 0.6, 0, 0.3, -0.4), nudge_y = c(0, 0, 0,0, 0,0),
            color= 'Red')+
  labs(x = "Longitude", y = "Latitude")+
  theme(
    panel.background = element_rect(fill = 'grey', size= 0.5,
                                    linetype = 0, color = 'black'),
  )+ theme(
  plot.margin = margin(1, 1, 1, 1, "cm"))

fig= ggplotly(itd) %>%
  layout(autosize = F, width = 750, height = 750)
fig

Time Series

yrwise= df_clean %>% group_by(DESIRED_FORMAT2) %>%
  mutate(DESIRED_FORMAT2 = as.Date(paste0(DESIRED_FORMAT2, " 01"), format = "%b %Y %d")) %>%
  arrange(DESIRED_FORMAT2)   %>%
  #arrange(month(DESIRED_FORMAT2)) %>% 
  summarize(count = n())
#View(yrwise)


# Find the minimum and maximum dates in yrwise$DESIRED_FORMAT2
min_date <- min(yrwise$DESIRED_FORMAT2)
max_date <- max(yrwise$DESIRED_FORMAT2)

# Create a sequence of complete monthly dates
complete_dates <- seq(from = min_date, to = max_date, by = "month")
complete_dates_df <- data.frame(DESIRED_FORMAT2 = complete_dates)

# Merge yrwise with the complete sequence of dates
yrwise <- merge(yrwise, complete_dates_df, by = "DESIRED_FORMAT2", all = TRUE)

#View(yrwise)

yrwise <- na_kalman(yrwise) 
#View(yrwise)

# plotting the graph
mts <- ts(yrwise$count, start = c(2000,10), end = c(2023,09), frequency = 12)
#mts

p1= ts.plot(mts, main = "Time Series Plot", xlab = "Time", ylab = "Count of Deaths")

abline(reg=lm(mts~time(mts)))

adf.test(mts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  mts
## Dickey-Fuller = -6.3336, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
arima_model=auto.arima(mts)

print(arima_model)
## Series: mts 
## ARIMA(0,0,1) with non-zero mean 
## 
## Coefficients:
##          ma1     mean
##       0.3773  39.9151
## s.e.  0.0575   9.4919
## 
## sigma^2 = 13231:  log likelihood = -1700.36
## AIC=3406.73   AICc=3406.82   BIC=3417.59
par(mfrow=c(2,1))
acf_result= acf(mts, lag.max = 12, main = "Autocorrelation Function", 
                xlab = "Lag", ylab = "ACF")
pacf_result= pacf(mts, lag.max = 12, main = "Partial Autocorrelation Function",
                  xlab = "Lag", ylab = "PACF")

forecast_values <- forecast(mts, h=12)
#par(mfrow=c(1,1))
plot(forecast_values, main="ARIMA Forecast",
     xlab="Time", ylab="Count of Deaths")

Data Set

datatable(df_clean, options = list(
  scrollX = TRUE,  # Enable horizontal scrolling if necessary
  scrollY = '500px',  # Set the maximum height of the table
  scrollCollapse = TRUE  # Collapse table borders when scrollbar is present
))
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

END.